import os
import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# 设置文件路径和字体路径
file_path = 'D:\\学习\\MSC\\COMP5571\\project\\果麦麦 汇总.txt'  # 文本文件路径
font_path = 'C:\\Users\\86136\\AppData\\Local\\Microsoft\\Windows\\Fonts\\黑体.ttf'  # 字体文件路径

# 读取文件内容
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

# 使用jieba进行分词
words = jieba.cut(text)
word_list = [word for word in words if len(word) > 1]  # 过滤掉单个字符的词

# 统计词频
word_counts = Counter(word_list)

# 获取最常用的100个词
most_common_words = word_counts.most_common(100)

# 打印最常用的100个词
for word, count in most_common_words:
    print(f"{word}: {count}")

# 生成词云
wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate_from_frequencies(word_counts)

# 显示词云
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')  # 不显示坐标轴
plt.show()
